package WebsiteCase

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}

object LogAnalyerSQL {
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf().setMaster("local").setAppName("loganalyer")
    val sc = new SparkContext(conf)
    val logRdd: RDD[ApacheLog] = sc.textFile("/Users/hongyi/IdeaProjects/SparkSQLTest/src/main/scala/WebsiteCase/log.txt")
      .map(line => ApacheLog.parseLog(line))
    val sqlContext: SQLContext = new SQLContext(sc)

    import sqlContext.implicits._
    val df = logRdd.toDF()

    df.registerTempTable("log")
    //缓存
    sqlContext.cacheTable("log")
    //需求一  max min avg
    //sqlContext.sql("select avg(contentSize) avg_contentSize,max(contentSize) max_contentSize,min(contentSize) min_contentSize from log").show()
    //需求二  求状态码出现的次数
    // sqlContext.sql("select responseCode,count(responseCode) count_responseCode from log group by responseCode").show()
    //需求三 哪些ip访问超过n次
    //sqlContext.sql("select count(ipAddress) total,ipAddress from log group by ipAddress having total>2").show()
    //需求四 找出被访问次数最多的前三个
    //sqlContext.sql("select count(endPoint) total,endPoint from log group by endPoint order by total desc limit 3 ").show()
  }

}
